import requests

from bs4 import BeautifulSoup
import mysql.connector

# 月份缩写
monList = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

def pa(p):

    #第几页
    print(p)
    #学校名称
    schoolName = "吉林大学"

    #爬取链接
    url = "https://jdjyw.jlu.edu.cn/portal/recruit/list?type=2&pageNo="+p+"&pageSize=20"

    #爬取数据并解析
    html = requests.get(url=url,verify=False)
    html.encoding = "utf-8"

    soup = BeautifulSoup(html.text, 'lxml')

    #找到所有<dt class="typeL">标签
    dataList = soup.find_all("dt",class_="typeL")

    for data in dataList:
        createTime = data.find("span",class_="fr").text

        #从零碎文本中获取日期信息
        year = (createTime.split(", ")[1]).split(" ")[0]
        month = (createTime.split("[")[1]).split(" ")[0]
        day = (createTime.split(",")[0]).split(" ")[1]

        i=1
        # 日期格式转换，拼接日期
        for mon in monList:
            if mon == month:
                createTime = year+"-"+str(i)+"-"+day
                break
            i=i+1

        # 标题去除空格
        jobTitle = (data.find("a").text).split("                    ")[1]
        print(createTime+"|"+jobTitle)
        mysql_conn(jobTitle,schoolName,createTime)

#数据库连接
def mysql_conn(job_title,school_name,create_time):
    mydb = mysql.connector.connect(
        host="127.0.0.1",
        user="root",
        passwd="",
        database="hschool_job"
    )
    mycursor = mydb.cursor()

    #插入
    sql = "INSERT INTO h_job (job_title,school_name,create_time) VALUES (%s,%s,%s)"
    val = (job_title,school_name,create_time)
    mycursor.execute(sql, val)
    mydb.commit()

if __name__ == '__main__':
    # 1-1654
    for i in range(647,1654):
        pa(str(i))
